Pacotes necessários:
#if(!require(PSF)) install.packages("PSF")
#if(!require(timetk)) remotes::install_github("business-science/timetk")
if(!require(EflowStats)) remotes::install_github("USGS-R/EflowStats")
pacotes <- c(
"here",
"usethis",
"data.table",
"HEobs",
# "PSF",
"tidyverse",
"lubridate",
"fs",
"checkmate",
# "xts",
# "hydroGOF",
# "ModelMetrics",
# "forecast",
# "timetk",
"EflowStats",
"hydrostats",
#"NbClust",
# "cluster",
# "cluster.datasets",
"cowplot",
# "clValid",
"ggfortify",
#"clustree",
#"dendextend",
#"factoextra",
#"FactoMineR",
#"corrplot",
#"GGally",
#"ggiraphExtra",
"kableExtra",
"tidymodels",
"fasstr"
)
# Carregar os pacotes
easypackages::libraries(pacotes)
Scripts:
source(here('R', 'load-data.R'))
source(here('R', 'utils.R'))
source(here('R', 'prep-stn-data.R'))
Os dados importados de vazão devem ser regularmente espaçados no tempo. Esta adequação das séries diárias, se necessária, pode ser realizada com a função complete_dates() do pacote {lhmetools}. Assim assegura-se que todas estações possuam 1 observação por dia e sem datas faltantes.
Metadados
data_link <- "https://www.dropbox.com/s/d40adhw66uwueet/VazoesNaturaisONS_D_87UHEsDirceuAssis_2018.dat?dl=1"
qnat_meta <- extract_metadata(NA_character_, informative = TRUE)
glimpse(qnat_meta)
## Rows: 87
## Columns: 5
## $ estacao_codigo <dbl> 18, 237, 215, 119, 190, 32, 14, 247, 1, 216, 191, 149, …
## $ latitude <dbl> -19.918751, -22.636078, -27.776389, -23.811460, -6.7985…
## $ longitude <dbl> -49.92053, -48.27274, -51.18944, -46.52390, -43.92756, …
## $ nome_estacao <chr> "A. VERMELHA", "BARRA BONITA", "BARRA GRANDE", "BILLING…
## $ municipio <chr> "A. VERMELHA", "BARRA BONITA", "BARRA GRANDE", "BILLING…
Dados
qnat_data <- qnat_dly_ons() %>%
select(date, qnat, code_stn) %>%
lhmetools::complete_dates(group = "code_stn")
glimpse(qnat_data)
## Rows: 2,796,267
## Columns: 3
## $ date <date> 1931-01-02, 1931-01-03, 1931-01-04, 1931-01-05, 1931-01-06, …
## $ code_stn <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ qnat <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
# Incluindo nome da estacao
qnat_data <- qnat_data %>%
full_join(select(qnat_meta, estacao_codigo, nome_estacao),
by = c(code_stn = "estacao_codigo")
) %>%
rename("name_stn" = nome_estacao)
Preparação dos dados para aplicação da função calc_magnifSeven com a opção de ano hidrológico (water year) para o posto 74 da ONS (G. B. Munhoz).
qnat_posto <- qnat_data %>%
sel_station(.,station = 74)
glimpse(qnat_posto)
## Rows: 32,141
## Columns: 4
## $ date <date> 1931-01-02, 1931-01-03, 1931-01-04, 1931-01-05, 1931-01-06, …
## $ code_stn <dbl> 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 7…
## $ qnat <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ name_stn <chr> "G.B. MUNHOZ", "G.B. MUNHOZ", "G.B. MUNHOZ", "G.B. MUNHOZ", "…
summary(qnat_posto)
## date code_stn qnat name_stn
## Min. :1931-01-02 Min. :74 Min. : 69.58 Length:32141
## 1st Qu.:1953-01-01 1st Qu.:74 1st Qu.: 300.02 Class :character
## Median :1975-01-01 Median :74 Median : 524.20 Mode :character
## Mean :1975-01-01 Mean :74 Mean : 730.61
## 3rd Qu.:1996-12-31 3rd Qu.:74 3rd Qu.: 958.57
## Max. :2018-12-31 Max. :74 Max. :7832.77
## NA's :13605
magnif7(stn_data = select(qnat_posto, date, qnat))
## # A tibble: 8 × 2
## indice statistic
## <chr> <dbl>
## 1 lam1 736.
## 2 tau2 0.42
## 3 tau3 0.35
## 4 tau4 0.18
## 5 ar1 0.982
## 6 amplitude 0.17
## 7 phase 262.
## 8 bfi 0.413
by_stn <- qnat_data %>%
group_by(code_stn) %>%
nest()
Fração mensal da vazão anual
# https://jcoliver.github.io/learn-r/008-ggplot-dendrograms-and-heatmaps.html
# glimpse(by_stn[["data"]][[1]])
q_mon_clim <- by_stn %>%
unnest(cols = "data") %>%
group_by(code_stn, month = lubridate::month(date)) %>%
select(-name_stn) %>%
summarise(q_med = mean(qnat, na.rm = TRUE))
## `summarise()` has grouped output by 'code_stn'. You can override using the
## `.groups` argument.
q_ann_clim <- q_mon_clim %>%
summarise(q_tot = sum(q_med))
q_mon_clim <- q_mon_clim %>%
full_join(q_ann_clim) %>%
mutate(q_frac = q_med/q_tot * 100) %>%
ungroup() %>%
mutate(code_stn = as.factor(code_stn),
month = as.factor(month))
## Joining, by = "code_stn"
psych::describe(q_mon_clim) %>%
relocate(skew, kurtosis)
## skew kurtosis vars n mean sd median trimmed mad
## code_stn* 0.00 -1.20 1 1044 44.00 25.13 44.00 44.00 32.62
## month* 0.00 -1.22 2 1044 6.50 3.45 6.50 6.50 4.45
## q_med 6.10 46.86 3 1044 1192.13 3189.00 246.79 480.65 294.39
## q_tot 4.34 21.38 4 1044 14305.57 32676.29 3515.14 6265.35 3980.50
## q_frac 0.59 -0.32 5 1044 8.33 3.81 7.44 8.08 3.79
## min max range se
## code_stn* 1.00 87.00 86.00 0.78
## month* 1.00 12.00 11.00 0.11
## q_med 6.13 35961.74 35955.61 98.70
## q_tot 139.20 226833.52 226694.31 1011.31
## q_frac 1.11 20.30 19.19 0.12
Agrupamento dos postos
qlong <- select(q_mon_clim, code_stn, month, q_frac) %>%
pivot_wider(names_from = "month",
values_from = "q_frac",
names_prefix = "qfrac_"
)
qlong_scaled <- qlong
qlong_scaled[,-1] <- scale(qlong_scaled[,-1])
# Run clustering
qmat <- as.matrix(qlong_scaled[, -1])
rownames(qmat) <- qlong_scaled$code_stn
clustd <- dist(x = qmat)
cd <- hclust(d = clustd, method="ward.D2")
#cd <- hclust(d = dist(x = qmat))
# optclus <- sapply(2:20,
# function(i)
# summary(cluster::silhouette(cutree(cd, k = i), clustd))$avg.width
# )
# optclus
# which.max(optclus) # 2
q_dendro <- as.dendrogram(cd)
# Create dendro
dendro_plot <- ggdendro::ggdendrogram(data = q_dendro, rotate = TRUE)+
theme(axis.text.y = element_text(size = 8))
# Preview the plot
dendro_plot
# linhas da ordem dos postos
q_order <- order.dendrogram(q_dendro)
# ordem dos postos
as.integer(as.vector(qlong_scaled$code_stn[q_order]))
## [1] 73 71 72 77 78 74 76 111 93 220 102 286 101 216 92 94 98 215 277
## [20] 287 275 279 291 99 295 281 296 115 61 63 158 145 190 247 149 119 161 259
## [39] 294 266 120 121 237 243 240 242 47 117 134 196 144 283 188 255 205 209 33
## [58] 31 32 201 25 206 24 169 251 1 211 6 14 17 18 246 34 245 197 125
## [77] 130 254 155 156 278 257 253 191 270 271 273
Heat map da fração mensal da vazão anual com postos ordenados pelo agrupamento.
q_mon_clim_trans <- q_mon_clim %>%
mutate(code_stn = factor(x = code_stn,
levels = qlong_scaled$code_stn[q_order],
ordered = TRUE),
q_frac = round((q_frac/2))*2
)
# cbind(t = q_mon_clim_trans$q_frac, o = q_mon_clim$q_frac)
cols <- c("firebrick1", "lightpink3", "lightsteelblue3",
"cadetblue1", "cornflowerblue","blue", "navyblue")
q_mon_clim_trans %>%
ggplot(aes(x = month, y = code_stn)) +
geom_tile(aes(fill = q_frac), colour = "white") +
#geom_tile(aes(fill = factor(q_frac)), colour = "white") +
scale_x_discrete(expand = c(0,0)) +
theme_bw() +
#scale_fill_viridis_c()
#scale_fill_viridis_c(guide = "legend")
scale_fill_gradientn( "Vazão (% da média anual)",
colours = cols,
#guide = "legend",
breaks = seq(0, 20, by = 2)
) +
#scale_fill_manual(values = cols) +
theme(legend.title = element_text(angle = 90, vjust = 1),
legend.key.height = unit(1.5, units = "cm")
)
# scale_fill_distiller(
# palette='RdYlBu',
# direction = 1,
# breaks = seq(0, 20, by = 2),
# #guide = "legend"
# )
library(heatmaply)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: viridis
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
##
## ======================
## Welcome to heatmaply version 1.3.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags:
## https://stackoverflow.com/questions/tagged/heatmaply
## ======================
index_cols <- c(8:12, 1:7) + 1
qdata <- as.data.frame(qlong[, index_cols])
names(qdata) <- tolower(month.abb[index_cols-1])
lut_postos <- qnat_data %>%
select(contains("stn")) %>%
distinct()
postos <- lut_postos$name_stn
postos <- setNames(postos, lut_postos$code_stn)
rownames(qdata) <- postos[as.character(qlong$code_stn)]
heatmaply(qdata,
k_row = 4,
Colv = FALSE,
#k_col = 3,
#scale = "column"
scale_fill_gradient_fun = scale_fill_gradientn( "Vazão \n (% da média anual)",
colours = cols,
#guide = "legend",
breaks = seq(0, 20, by = 2)
),
fontsize_row = 6
#seriate = "mean",
#seriate = "OLO"
#seriate = "GW"
#seriate = "none"
)
https://bcgov.github.io/fasstr/articles/fasstr.html
Será relevante considerar a diferenças nos períodos dos anos hidrológicos por posto?
check_season_plots <- qnat_data %>%
rename("Date" = date) %>%
fasstr::plot_daily_stats(
values = "qnat",
groups = "code_stn",
start_year = 1991,
end_year = 2010,
log_discharge = TRUE,
add_year = 2001,
complete_years = TRUE,
include_title = TRUE
)
codes <- names(check_season_plots) %>%
readr::parse_number()
lookup <- qnat_data %>%
select(contains("stn")) %>%
distinct() %>%
slice(order(codes)) %>%
arrange(code_stn)
plot_l <- map(seq_along(codes),
function(i) {
# i = 1
nm <- filter(lookup, code_stn == codes[i]) %>%
pull(name_stn)
check_season_plots[[i]] + ggtitle(paste0("Posto: ", nm))
})
plot_l
seven_stats <- by_stn %>%
#ungroup() %>%
mutate(stats = purrr::map(data, ~.x %>% select(date, qnat) %>% magnif7))
seven_stats_tidy <- seven_stats %>%
select(stats) %>%
unnest(cols = "stats") %>%
pivot_wider(names_from = indice, values_from = statistic) %>%
ungroup()
## Adding missing grouping variables: `code_stn`
seven_stats_tidy
## # A tibble: 87 × 9
## code_stn lam1 tau2 tau3 tau4 ar1 amplitude phase bfi
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 128. 0.36 0.39 0.22 0.957 0.88 38.5 0.595
## 2 6 893. 0.37 0.36 0.19 0.974 0.91 41.8 0.570
## 3 14 52.8 0.38 0.37 0.21 0.944 0.86 44.4 0.551
## 4 17 1798. 0.35 0.34 0.17 0.985 0.96 48.2 0.603
## 5 18 2041. 0.34 0.33 0.16 0.986 0.97 49.3 0.609
## 6 24 454. 0.42 0.39 0.22 0.961 0.88 44.6 0.531
## 7 25 286. 0.37 0.37 0.21 0.943 0.88 46.8 0.577
## 8 31 1450. 0.38 0.35 0.19 0.975 0.95 49.0 0.568
## 9 32 1533. 0.37 0.35 0.18 0.976 0.96 49.5 0.571
## 10 33 2447. 0.36 0.32 0.16 0.978 1 50.5 0.587
## # … with 77 more rows
saveRDS(seven_stats_tidy, file = here("output", "seven_stats.RDS"))